clear;
addpath('../Functions')

% Set path and list all .mat files
dataPath = '../Data/Data_wide_all/';
fileList = dir([dataPath, '*.mat']);

% Load factor data once
factors = load('../Data/factors.mat');
Z = factors.final_return_matrix;

% Initialize storage for risk premia and R2 estimates
lambdat_hat = cell(1, 5);
R2t_hat     = cell(1, 5);

% Loop over all .mat data files
for i = 1:length(fileList)%-1
    
    % Load individual equity data
    data = load([dataPath, num2str(1000 + i), '.mat']);
    X = data.df_all;

    merged = [];

    % Align observations on common dates
    common_dates = intersect(X(:, 1), Z(:, 1));

    for l = 1:length(common_dates)
        date = common_dates(l);
        rowsX = X(X(:, 1) == date, :);
        rowsZ = Z(Z(:, 1) == date, :);
        merged = [merged; [rowsX, rowsZ(:, 3:end)]];
    end

    % Aggregate returns into 27-row blocks using (1 + r) cumulative product
    [T, N] = size(merged);
    blockSize = 27;
    numBlocks = floor(T / blockSize);

    Y = NaN(numBlocks, N); % Preallocate
    Y(:, 1:2) = merged(1:blockSize:T, 1:2); % Retain date and time index

    for j = 3:N
        for b = 1:numBlocks
            idx = (b - 1) * blockSize + 1 : b * blockSize;
            Y(b, j) = -1 + prod(1 + merged(idx, j));
        end
    end

    % Extract factor and return data
    factors_sub = Y(:, [end - 6 : end - 1]);
    rf = Y(:, end);
    returns_sub = Y(:, 3:end - 7) - rf;  % excess returns

    % Run Fama-MacBeth-like cross-sectional regression
    factor_indices = [1, 3, 4, 5, 6];

    for j = 1:5
        f_idx = factor_indices(j);
        if f_idx ~= 4
            selected = 1:f_idx;
        else
            selected = [1:3, 6];  % skip 4th factor
        end

        % First-stage regression to estimate betas
        betahat{i,j} = ((factors_sub(:, selected)' * factors_sub(:, selected)) \ ...
                  (factors_sub(:, selected)' * returns_sub))';
        yy{i,j} = (mean(returns_sub, 1))';

    end

    disp(i);
end

%% Summarize risk premia estimates

for i = 1:length(fileList)-1

    for j = 1:5
        valid = (~isnan(betahat{i, j}(:,1))).*(~isnan(yy{i+1, j}(:,1)))>0;
        betas_valid = betahat{i,j}(valid, :);
        yy_valid = yy{i+1,j}(valid,1);

        % Second-stage regression to estimate lambda (risk premia)
     
        coeff = (betas_valid' * betas_valid) \ ...
                               (betas_valid' * yy_valid);
        lambdat_hat{j}(i, :) = coeff;
        yypred = betas_valid*coeff;
        R2t_hat{j}(i) = sum((yypred).^2)/sum(yy_valid.^2)*100;%
    end

    disp(i);

end



final = cell(7, 10); % 5 models × (mean, t-stat) = 10 columns
FF = [1, 3, 4, 5, 6];

for i = 1:5
    if FF(i) ~= 4
        mu = mean(lambdat_hat{i})' * 100*252;
        tstat = mean(lambdat_hat{i}) ./ std(lambdat_hat{i})*sqrt(length(lambdat_hat{i}));
        final(1:FF(i), 2 * i - 1) = num2cell(mu);
        final(1:FF(i), 2 * i) = num2cell(tstat);
    else
        mu = mean(lambdat_hat{i})' * 100*252;
        tstat = mean(lambdat_hat{i}) ./ std(lambdat_hat{i})*sqrt(length(lambdat_hat{i}));
        final([1:FF(i) - 1, 6], 2 * i - 1) = num2cell(mu);
        final([1:FF(i) - 1, 6], 2 * i) = num2cell(tstat);
    end
    final(7, 2 * i - 1) = num2cell(mean(R2t_hat{1,i}));  % Placeholder for R² if needed
end

% Format and wrap t-stats in LaTeX-style parentheses
final = cellfun(@(x) round(x, 2), final, 'UniformOutput', false);
final = cellfun(@num2str, final, 'UniformOutput', false);

for i = 1:5
    if FF(i) ~= 4
        final(1:FF(i), 2 * i) = cellfun(@(x) ['$(', x, ')$'], final(1:FF(i), 2 * i), 'UniformOutput', false);
    else
        final([1:FF(i) - 1, 6], 2 * i) = cellfun(@(x) ['$(', x, ')$'], final([1:FF(i) - 1, 6], 2 * i), 'UniformOutput', false);
    end
end

T = cell2table(final);

writetable(T, '../Daily.csv')
